import sys
sys.path.append("..")
from database.db_handler import MysqlHander
from common.my_http import MyHttp
import urllib.request
from bs4 import BeautifulSoup
from common.entity_data import EntityData
import glob
import jieba
import re
import threading
import hashlib

'''
港口
'''

class GangkouData:
    def __init__(self):
        self.entity = EntityData()
        self.diqu_url = {}
        pass
    
    def get_url_list(self):
        url = "https://gangkou.bmcx.com/"
        bs4_data = MyHttp.bs4_utf8_data(url)
        for a in bs4_data.find_all(name="table")[1].find_all("a"):
            href = "https://gangkou.bmcx.com" + a["href"]
            name = a.text
            self.diqu_url[name] = href

        value_all = ""
        for k,v in self.diqu_url.items():
            print(v)
            bs4 = MyHttp.bs4_utf8_data(v)
            trs = bs4.find(name="table").find(name="table").find_all(name="tr")
            name_set = ""
            if len(trs) <= 0:
                continue
            for i in range (1, len(trs)):
                tds = trs[i].find_all(name="td")
                if(len(tds) != 4):
                    continue
                value = tds[1].text.split("（")[0].replace(" ","") + "港"
                name_set += "|" + value
                value_all += "|" + value
            print(k + ":" + name_set)
            if len(trs) <= 6:
                continue
            self.entity.add_entity_data(k + "地区港口", "", 100, 1000003, "", "", name_set)
            self.entity.update_entity_data(k + "地区港口", "", 100, 1000003, "", "", name_set)
        self.entity.add_entity_data("世界各地港口", "", 100, 1000003, "", "", value_all)
        self.entity.update_entity_data("世界各地港口", "", 100, 1000003, "", "", value_all)

    
    def get_data(self):
        self.get_url_list()

        

def gangkou_api():
    d = GangkouData()
    d.get_data()

if __name__ == '__main__':
    gangkou_api()
